In [1]:
#import the reqired libraries
import random
random.seed(0)
# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ZeroPadding2D, Convolution2D, MaxPooling2D, Dropout, Flatten 

from PIL import Image
from numpy import asarray

%tensorflow_version 2.x
import tensorflow
tensorflow.__version__

from google.colab.patches import cv2_imshow
# Load the Drive helper and mount
from google.colab import drive
from sklearn.model_selection import train_test_split
from zipfile import ZipFile

image.png

• DOMAIN: Entertainment

• CONTEXT: Company X owns a movie application and repository which caters movie streaming to millions of users who on subscription basis. Company wants to automate the process of cast and crew information in each scene from a movie such that when a user pauses on the movie and clicks on cast information button, the app will show details of the actor in the scene. Company has an in-house computer vision and multimedia experts who need to detect faces from screen shots from the movie scene.

• DATA DESCRIPTION: The dataset comprises of images and its mask where there is a human face.

• PROJECT OBJECTIVE: Face detection from training images.

Steps and tasks:

  1. Import the dataset.
  2. Create features (images) and labels (mask) using that data.
  3. Mask detection model: ● Design a face mask detection model. Hint: Use U-net along with pre-trained transfer learning models ● Design your own Dice Coefficient and Loss function. ● Train, tune and test the model. ● Evaluate the model using testing data.
  4. Use the “Prediction image” as an input to your designed model and display the output of the image.
In [2]:
# This will prompt for authorization.
drive.mount('/content/drive/')
!ls "/content/drive/MyDrive/Colab Notebooks/Adv_CV2"
Mounted at /content/drive/
'Part 1Test Data - Prediction Image.jpeg'
'Part 1- Train data - images.npy'
'Part 2- Test Image - Benedict Cumberbatch9.jpg'
'Part 2 - Test Image - Dwayne Johnson4.jpg'
'Part 3 - Aligned Face Dataset from Pinterest.zip'
'Part 3 - vgg_face_weights.h5'
 pins
 PINS
In [3]:
cd "/content/drive/MyDrive/Colab Notebooks/Adv_CV2"
/content/drive/MyDrive/Colab Notebooks/Adv_CV2
In [4]:
# Loading the images file
data = np.load('Part 1- Train data - images.npy', allow_pickle=True)
In [5]:
print('size:',data.size)
print('shape', data.shape)
size: 818
shape (409, 2)
In [6]:
cv2_imshow(data[200][0])
In [7]:
fi,axis = plt.subplots(10,3,figsize=(20,30))
row = 0
col = 0
index = 0
for i in range(30):
  axis[row][col].imshow(data[index][0], interpolation='nearest')
  index = index + 12
  col = col + 1
  if col > 2:
    row = row + 1
    col = 0
plt.show()
In [8]:
#Declare a variable IMAGE_SIZE = 224 as we will be taking Input shape as 224 * 224
IMAGE_SIZE = 224
In [9]:
masks = np.zeros((int(data.shape[0]), IMAGE_SIZE, IMAGE_SIZE))
X = np.zeros((int(data.shape[0]),IMAGE_SIZE, IMAGE_SIZE, 3))

for index in range(data.shape[0]):
  img = data[index][0]
  image = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
 
  #only taking first 3 channel of each image
  try:
    image = image[:,:,:3]
  except:
    print(f"Number {index} is Grayscale image, converting to RGB to make it standard in data")
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    continue
  X[index] = preprocess_input(np.array(image, dtype=np.float32)) # Convert to float32 array

  for i in data[index][1]:
    x1 = int(i['points'][0]['x'] * IMAGE_SIZE)
    x2 = int(i['points'][1]['x'] * IMAGE_SIZE)
    y1 = int(i['points'][0]['y'] * IMAGE_SIZE)
    y2 = int(i['points'][1]['y'] * IMAGE_SIZE)

    masks[index][y1:y2, x1:x2] = 1
print(f"Shape of X is '{X.shape}' and the shape of mask dataset is '{masks.shape}' ")
Number 272 is Grayscale image, converting to RGB to make it standard in data
Shape of X is '(409, 224, 224, 3)' and the shape of mask dataset is '(409, 224, 224)' 
In [10]:
#split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)

print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")
Shape of X_train is '(327, 224, 224, 3)' and the shape of y_train is '(327, 224, 224)'
Shape of X_val is '(65, 224, 224, 3)' and the shape of y_val is '(65, 224, 224)'
Shape of X_test is '(17, 224, 224, 3)' and the shape of y_test is '(17, 224, 224)'
In [11]:
#view a train image
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(X_train[0], cmap='gray')

a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(X_train[10])
imgplot.set_clim(0.0, 0.7)

a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(X_train[20])
imgplot.set_clim(0.0, 1.4)

a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(X_train[30])
imgplot.set_clim(0.0, 2.1)

fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(y_train[0])

a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(y_train[10])
imgplot.set_clim(0.0, 0.7)

a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(y_train[20])
imgplot.set_clim(0.0, 1.4)

a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(y_train[30])
imgplot.set_clim(0.0, 1.4)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [12]:
def create_model(trainable=True):
    inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")

    ALPHA = 0.35 # Width hyper parameter for MobileNet (0.25, 0.5, 0.75, 1.0). Higher width means more accurate but slower

    # Do not include classification (top) layer
    model = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=ALPHA)
    skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
    encoder_output = model.get_layer("block_13_expand_relu").output

    f = [16, 32, 48, 64]
    x = encoder_output
    for i in range(1, len(skip_connection_names)+1, 1):
        x_skip = model.get_layer(skip_connection_names[-i]).output
        x = UpSampling2D((2, 2))(x)
        x = Concatenate()([x, x_skip])
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
    x = Conv2D(1, (1, 1), padding="same")(x)
    x = Activation("sigmoid")(x)
    
    model = Model(inputs, x)
    return model
In [13]:
model = create_model()
model.summary()
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [96, 128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_0.35_224_no_top.h5
2023424/2019640 [==============================] - 0s 0us/step
2031616/2019640 [==============================] - 0s 0us/step
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_image (InputLayer)        [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 16) 432         input_image[0][0]                
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 16) 64          Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 112, 112, 16) 0           bn_Conv1[0][0]                   
__________________________________________________________________________________________________
expanded_conv_depthwise (Depthw (None, 112, 112, 16) 144         Conv1_relu[0][0]                 
__________________________________________________________________________________________________
expanded_conv_depthwise_BN (Bat (None, 112, 112, 16) 64          expanded_conv_depthwise[0][0]    
__________________________________________________________________________________________________
expanded_conv_depthwise_relu (R (None, 112, 112, 16) 0           expanded_conv_depthwise_BN[0][0] 
__________________________________________________________________________________________________
expanded_conv_project (Conv2D)  (None, 112, 112, 8)  128         expanded_conv_depthwise_relu[0][0
__________________________________________________________________________________________________
expanded_conv_project_BN (Batch (None, 112, 112, 8)  32          expanded_conv_project[0][0]      
__________________________________________________________________________________________________
block_1_expand (Conv2D)         (None, 112, 112, 48) 384         expanded_conv_project_BN[0][0]   
__________________________________________________________________________________________________
block_1_expand_BN (BatchNormali (None, 112, 112, 48) 192         block_1_expand[0][0]             
__________________________________________________________________________________________________
block_1_expand_relu (ReLU)      (None, 112, 112, 48) 0           block_1_expand_BN[0][0]          
__________________________________________________________________________________________________
block_1_pad (ZeroPadding2D)     (None, 113, 113, 48) 0           block_1_expand_relu[0][0]        
__________________________________________________________________________________________________
block_1_depthwise (DepthwiseCon (None, 56, 56, 48)   432         block_1_pad[0][0]                
__________________________________________________________________________________________________
block_1_depthwise_BN (BatchNorm (None, 56, 56, 48)   192         block_1_depthwise[0][0]          
__________________________________________________________________________________________________
block_1_depthwise_relu (ReLU)   (None, 56, 56, 48)   0           block_1_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_1_project (Conv2D)        (None, 56, 56, 8)    384         block_1_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_1_project_BN (BatchNormal (None, 56, 56, 8)    32          block_1_project[0][0]            
__________________________________________________________________________________________________
block_2_expand (Conv2D)         (None, 56, 56, 48)   384         block_1_project_BN[0][0]         
__________________________________________________________________________________________________
block_2_expand_BN (BatchNormali (None, 56, 56, 48)   192         block_2_expand[0][0]             
__________________________________________________________________________________________________
block_2_expand_relu (ReLU)      (None, 56, 56, 48)   0           block_2_expand_BN[0][0]          
__________________________________________________________________________________________________
block_2_depthwise (DepthwiseCon (None, 56, 56, 48)   432         block_2_expand_relu[0][0]        
__________________________________________________________________________________________________
block_2_depthwise_BN (BatchNorm (None, 56, 56, 48)   192         block_2_depthwise[0][0]          
__________________________________________________________________________________________________
block_2_depthwise_relu (ReLU)   (None, 56, 56, 48)   0           block_2_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_2_project (Conv2D)        (None, 56, 56, 8)    384         block_2_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_2_project_BN (BatchNormal (None, 56, 56, 8)    32          block_2_project[0][0]            
__________________________________________________________________________________________________
block_2_add (Add)               (None, 56, 56, 8)    0           block_1_project_BN[0][0]         
                                                                 block_2_project_BN[0][0]         
__________________________________________________________________________________________________
block_3_expand (Conv2D)         (None, 56, 56, 48)   384         block_2_add[0][0]                
__________________________________________________________________________________________________
block_3_expand_BN (BatchNormali (None, 56, 56, 48)   192         block_3_expand[0][0]             
__________________________________________________________________________________________________
block_3_expand_relu (ReLU)      (None, 56, 56, 48)   0           block_3_expand_BN[0][0]          
__________________________________________________________________________________________________
block_3_pad (ZeroPadding2D)     (None, 57, 57, 48)   0           block_3_expand_relu[0][0]        
__________________________________________________________________________________________________
block_3_depthwise (DepthwiseCon (None, 28, 28, 48)   432         block_3_pad[0][0]                
__________________________________________________________________________________________________
block_3_depthwise_BN (BatchNorm (None, 28, 28, 48)   192         block_3_depthwise[0][0]          
__________________________________________________________________________________________________
block_3_depthwise_relu (ReLU)   (None, 28, 28, 48)   0           block_3_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_3_project (Conv2D)        (None, 28, 28, 16)   768         block_3_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_3_project_BN (BatchNormal (None, 28, 28, 16)   64          block_3_project[0][0]            
__________________________________________________________________________________________________
block_4_expand (Conv2D)         (None, 28, 28, 96)   1536        block_3_project_BN[0][0]         
__________________________________________________________________________________________________
block_4_expand_BN (BatchNormali (None, 28, 28, 96)   384         block_4_expand[0][0]             
__________________________________________________________________________________________________
block_4_expand_relu (ReLU)      (None, 28, 28, 96)   0           block_4_expand_BN[0][0]          
__________________________________________________________________________________________________
block_4_depthwise (DepthwiseCon (None, 28, 28, 96)   864         block_4_expand_relu[0][0]        
__________________________________________________________________________________________________
block_4_depthwise_BN (BatchNorm (None, 28, 28, 96)   384         block_4_depthwise[0][0]          
__________________________________________________________________________________________________
block_4_depthwise_relu (ReLU)   (None, 28, 28, 96)   0           block_4_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_4_project (Conv2D)        (None, 28, 28, 16)   1536        block_4_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_4_project_BN (BatchNormal (None, 28, 28, 16)   64          block_4_project[0][0]            
__________________________________________________________________________________________________
block_4_add (Add)               (None, 28, 28, 16)   0           block_3_project_BN[0][0]         
                                                                 block_4_project_BN[0][0]         
__________________________________________________________________________________________________
block_5_expand (Conv2D)         (None, 28, 28, 96)   1536        block_4_add[0][0]                
__________________________________________________________________________________________________
block_5_expand_BN (BatchNormali (None, 28, 28, 96)   384         block_5_expand[0][0]             
__________________________________________________________________________________________________
block_5_expand_relu (ReLU)      (None, 28, 28, 96)   0           block_5_expand_BN[0][0]          
__________________________________________________________________________________________________
block_5_depthwise (DepthwiseCon (None, 28, 28, 96)   864         block_5_expand_relu[0][0]        
__________________________________________________________________________________________________
block_5_depthwise_BN (BatchNorm (None, 28, 28, 96)   384         block_5_depthwise[0][0]          
__________________________________________________________________________________________________
block_5_depthwise_relu (ReLU)   (None, 28, 28, 96)   0           block_5_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_5_project (Conv2D)        (None, 28, 28, 16)   1536        block_5_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_5_project_BN (BatchNormal (None, 28, 28, 16)   64          block_5_project[0][0]            
__________________________________________________________________________________________________
block_5_add (Add)               (None, 28, 28, 16)   0           block_4_add[0][0]                
                                                                 block_5_project_BN[0][0]         
__________________________________________________________________________________________________
block_6_expand (Conv2D)         (None, 28, 28, 96)   1536        block_5_add[0][0]                
__________________________________________________________________________________________________
block_6_expand_BN (BatchNormali (None, 28, 28, 96)   384         block_6_expand[0][0]             
__________________________________________________________________________________________________
block_6_expand_relu (ReLU)      (None, 28, 28, 96)   0           block_6_expand_BN[0][0]          
__________________________________________________________________________________________________
block_6_pad (ZeroPadding2D)     (None, 29, 29, 96)   0           block_6_expand_relu[0][0]        
__________________________________________________________________________________________________
block_6_depthwise (DepthwiseCon (None, 14, 14, 96)   864         block_6_pad[0][0]                
__________________________________________________________________________________________________
block_6_depthwise_BN (BatchNorm (None, 14, 14, 96)   384         block_6_depthwise[0][0]          
__________________________________________________________________________________________________
block_6_depthwise_relu (ReLU)   (None, 14, 14, 96)   0           block_6_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_6_project (Conv2D)        (None, 14, 14, 24)   2304        block_6_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_6_project_BN (BatchNormal (None, 14, 14, 24)   96          block_6_project[0][0]            
__________________________________________________________________________________________________
block_7_expand (Conv2D)         (None, 14, 14, 144)  3456        block_6_project_BN[0][0]         
__________________________________________________________________________________________________
block_7_expand_BN (BatchNormali (None, 14, 14, 144)  576         block_7_expand[0][0]             
__________________________________________________________________________________________________
block_7_expand_relu (ReLU)      (None, 14, 14, 144)  0           block_7_expand_BN[0][0]          
__________________________________________________________________________________________________
block_7_depthwise (DepthwiseCon (None, 14, 14, 144)  1296        block_7_expand_relu[0][0]        
__________________________________________________________________________________________________
block_7_depthwise_BN (BatchNorm (None, 14, 14, 144)  576         block_7_depthwise[0][0]          
__________________________________________________________________________________________________
block_7_depthwise_relu (ReLU)   (None, 14, 14, 144)  0           block_7_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_7_project (Conv2D)        (None, 14, 14, 24)   3456        block_7_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_7_project_BN (BatchNormal (None, 14, 14, 24)   96          block_7_project[0][0]            
__________________________________________________________________________________________________
block_7_add (Add)               (None, 14, 14, 24)   0           block_6_project_BN[0][0]         
                                                                 block_7_project_BN[0][0]         
__________________________________________________________________________________________________
block_8_expand (Conv2D)         (None, 14, 14, 144)  3456        block_7_add[0][0]                
__________________________________________________________________________________________________
block_8_expand_BN (BatchNormali (None, 14, 14, 144)  576         block_8_expand[0][0]             
__________________________________________________________________________________________________
block_8_expand_relu (ReLU)      (None, 14, 14, 144)  0           block_8_expand_BN[0][0]          
__________________________________________________________________________________________________
block_8_depthwise (DepthwiseCon (None, 14, 14, 144)  1296        block_8_expand_relu[0][0]        
__________________________________________________________________________________________________
block_8_depthwise_BN (BatchNorm (None, 14, 14, 144)  576         block_8_depthwise[0][0]          
__________________________________________________________________________________________________
block_8_depthwise_relu (ReLU)   (None, 14, 14, 144)  0           block_8_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_8_project (Conv2D)        (None, 14, 14, 24)   3456        block_8_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_8_project_BN (BatchNormal (None, 14, 14, 24)   96          block_8_project[0][0]            
__________________________________________________________________________________________________
block_8_add (Add)               (None, 14, 14, 24)   0           block_7_add[0][0]                
                                                                 block_8_project_BN[0][0]         
__________________________________________________________________________________________________
block_9_expand (Conv2D)         (None, 14, 14, 144)  3456        block_8_add[0][0]                
__________________________________________________________________________________________________
block_9_expand_BN (BatchNormali (None, 14, 14, 144)  576         block_9_expand[0][0]             
__________________________________________________________________________________________________
block_9_expand_relu (ReLU)      (None, 14, 14, 144)  0           block_9_expand_BN[0][0]          
__________________________________________________________________________________________________
block_9_depthwise (DepthwiseCon (None, 14, 14, 144)  1296        block_9_expand_relu[0][0]        
__________________________________________________________________________________________________
block_9_depthwise_BN (BatchNorm (None, 14, 14, 144)  576         block_9_depthwise[0][0]          
__________________________________________________________________________________________________
block_9_depthwise_relu (ReLU)   (None, 14, 14, 144)  0           block_9_depthwise_BN[0][0]       
__________________________________________________________________________________________________
block_9_project (Conv2D)        (None, 14, 14, 24)   3456        block_9_depthwise_relu[0][0]     
__________________________________________________________________________________________________
block_9_project_BN (BatchNormal (None, 14, 14, 24)   96          block_9_project[0][0]            
__________________________________________________________________________________________________
block_9_add (Add)               (None, 14, 14, 24)   0           block_8_add[0][0]                
                                                                 block_9_project_BN[0][0]         
__________________________________________________________________________________________________
block_10_expand (Conv2D)        (None, 14, 14, 144)  3456        block_9_add[0][0]                
__________________________________________________________________________________________________
block_10_expand_BN (BatchNormal (None, 14, 14, 144)  576         block_10_expand[0][0]            
__________________________________________________________________________________________________
block_10_expand_relu (ReLU)     (None, 14, 14, 144)  0           block_10_expand_BN[0][0]         
__________________________________________________________________________________________________
block_10_depthwise (DepthwiseCo (None, 14, 14, 144)  1296        block_10_expand_relu[0][0]       
__________________________________________________________________________________________________
block_10_depthwise_BN (BatchNor (None, 14, 14, 144)  576         block_10_depthwise[0][0]         
__________________________________________________________________________________________________
block_10_depthwise_relu (ReLU)  (None, 14, 14, 144)  0           block_10_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_10_project (Conv2D)       (None, 14, 14, 32)   4608        block_10_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_10_project_BN (BatchNorma (None, 14, 14, 32)   128         block_10_project[0][0]           
__________________________________________________________________________________________________
block_11_expand (Conv2D)        (None, 14, 14, 192)  6144        block_10_project_BN[0][0]        
__________________________________________________________________________________________________
block_11_expand_BN (BatchNormal (None, 14, 14, 192)  768         block_11_expand[0][0]            
__________________________________________________________________________________________________
block_11_expand_relu (ReLU)     (None, 14, 14, 192)  0           block_11_expand_BN[0][0]         
__________________________________________________________________________________________________
block_11_depthwise (DepthwiseCo (None, 14, 14, 192)  1728        block_11_expand_relu[0][0]       
__________________________________________________________________________________________________
block_11_depthwise_BN (BatchNor (None, 14, 14, 192)  768         block_11_depthwise[0][0]         
__________________________________________________________________________________________________
block_11_depthwise_relu (ReLU)  (None, 14, 14, 192)  0           block_11_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_11_project (Conv2D)       (None, 14, 14, 32)   6144        block_11_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_11_project_BN (BatchNorma (None, 14, 14, 32)   128         block_11_project[0][0]           
__________________________________________________________________________________________________
block_11_add (Add)              (None, 14, 14, 32)   0           block_10_project_BN[0][0]        
                                                                 block_11_project_BN[0][0]        
__________________________________________________________________________________________________
block_12_expand (Conv2D)        (None, 14, 14, 192)  6144        block_11_add[0][0]               
__________________________________________________________________________________________________
block_12_expand_BN (BatchNormal (None, 14, 14, 192)  768         block_12_expand[0][0]            
__________________________________________________________________________________________________
block_12_expand_relu (ReLU)     (None, 14, 14, 192)  0           block_12_expand_BN[0][0]         
__________________________________________________________________________________________________
block_12_depthwise (DepthwiseCo (None, 14, 14, 192)  1728        block_12_expand_relu[0][0]       
__________________________________________________________________________________________________
block_12_depthwise_BN (BatchNor (None, 14, 14, 192)  768         block_12_depthwise[0][0]         
__________________________________________________________________________________________________
block_12_depthwise_relu (ReLU)  (None, 14, 14, 192)  0           block_12_depthwise_BN[0][0]      
__________________________________________________________________________________________________
block_12_project (Conv2D)       (None, 14, 14, 32)   6144        block_12_depthwise_relu[0][0]    
__________________________________________________________________________________________________
block_12_project_BN (BatchNorma (None, 14, 14, 32)   128         block_12_project[0][0]           
__________________________________________________________________________________________________
block_12_add (Add)              (None, 14, 14, 32)   0           block_11_add[0][0]               
                                                                 block_12_project_BN[0][0]        
__________________________________________________________________________________________________
block_13_expand (Conv2D)        (None, 14, 14, 192)  6144        block_12_add[0][0]               
__________________________________________________________________________________________________
block_13_expand_BN (BatchNormal (None, 14, 14, 192)  768         block_13_expand[0][0]            
__________________________________________________________________________________________________
block_13_expand_relu (ReLU)     (None, 14, 14, 192)  0           block_13_expand_BN[0][0]         
__________________________________________________________________________________________________
up_sampling2d (UpSampling2D)    (None, 28, 28, 192)  0           block_13_expand_relu[0][0]       
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 28, 28, 288)  0           up_sampling2d[0][0]              
                                                                 block_6_expand_relu[0][0]        
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 28, 28, 64)   165952      concatenate[0][0]                
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 28, 28, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 28, 28, 64)   0           batch_normalization[0][0]        
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 28, 28, 64)   36928       activation[0][0]                 
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 28, 28, 64)   256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 28, 28, 64)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
up_sampling2d_1 (UpSampling2D)  (None, 56, 56, 64)   0           activation_1[0][0]               
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 56, 56, 112)  0           up_sampling2d_1[0][0]            
                                                                 block_3_expand_relu[0][0]        
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 56, 56, 48)   48432       concatenate_1[0][0]              
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 56, 56, 48)   192         conv2d_2[0][0]                   
__________________________________________________________________________________________________
activation_2 (Activation)       (None, 56, 56, 48)   0           batch_normalization_2[0][0]      
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 56, 56, 48)   20784       activation_2[0][0]               
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 56, 56, 48)   192         conv2d_3[0][0]                   
__________________________________________________________________________________________________
activation_3 (Activation)       (None, 56, 56, 48)   0           batch_normalization_3[0][0]      
__________________________________________________________________________________________________
up_sampling2d_2 (UpSampling2D)  (None, 112, 112, 48) 0           activation_3[0][0]               
__________________________________________________________________________________________________
concatenate_2 (Concatenate)     (None, 112, 112, 96) 0           up_sampling2d_2[0][0]            
                                                                 block_1_expand_relu[0][0]        
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 112, 112, 32) 27680       concatenate_2[0][0]              
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 112, 112, 32) 128         conv2d_4[0][0]                   
__________________________________________________________________________________________________
activation_4 (Activation)       (None, 112, 112, 32) 0           batch_normalization_4[0][0]      
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 112, 112, 32) 9248        activation_4[0][0]               
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 112, 112, 32) 128         conv2d_5[0][0]                   
__________________________________________________________________________________________________
activation_5 (Activation)       (None, 112, 112, 32) 0           batch_normalization_5[0][0]      
__________________________________________________________________________________________________
up_sampling2d_3 (UpSampling2D)  (None, 224, 224, 32) 0           activation_5[0][0]               
__________________________________________________________________________________________________
concatenate_3 (Concatenate)     (None, 224, 224, 35) 0           up_sampling2d_3[0][0]            
                                                                 input_image[0][0]                
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 224, 224, 16) 5056        concatenate_3[0][0]              
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 224, 224, 16) 64          conv2d_6[0][0]                   
__________________________________________________________________________________________________
activation_6 (Activation)       (None, 224, 224, 16) 0           batch_normalization_6[0][0]      
__________________________________________________________________________________________________
conv2d_7 (Conv2D)               (None, 224, 224, 16) 2320        activation_6[0][0]               
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 224, 224, 16) 64          conv2d_7[0][0]                   
__________________________________________________________________________________________________
activation_7 (Activation)       (None, 224, 224, 16) 0           batch_normalization_7[0][0]      
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 224, 224, 1)  17          activation_7[0][0]               
__________________________________________________________________________________________________
activation_8 (Activation)       (None, 224, 224, 1)  0           conv2d_8[0][0]                   
==================================================================================================
Total params: 416,209
Trainable params: 409,025
Non-trainable params: 7,184
__________________________________________________________________________________________________
In [14]:
smooth = 1e-15
def dice_coefficient(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)
In [15]:
#Define loss function
def loss(y_true, y_pred):
    # return binary_crossentropy(y_true, y_pred) - tensorflow.keras.backend.log(dice_coefficient(y_true, y_pred) + tensorflow.keras.backend.epsilon())
    return 1.0 - dice_coefficient(y_true, y_pred)

Compile the model

Define optimizer

Define metrics

Define loss

In [16]:
optimizer = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss=loss, optimizer=optimizer, metrics=[dice_coefficient, Recall(), Precision()])

Training the Model

In [17]:
EPOCHS = 32
BATCH = 10
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
]
In [18]:
train_steps = len(X_train)//BATCH
valid_steps = len(X_val)//BATCH

if len(X_train) % BATCH != 0:
    train_steps += 1
if len(X_val) % BATCH != 0:
    valid_steps += 1

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    steps_per_epoch=train_steps,
    validation_steps=valid_steps,
    callbacks=callbacks
)
Epoch 1/32
33/33 [==============================] - 90s 3s/step - loss: 0.7529 - dice_coefficient: 0.2480 - recall: 0.4629 - precision: 0.2672 - val_loss: 0.7339 - val_dice_coefficient: 0.2696 - val_recall: 0.9667 - val_precision: 0.1941
Epoch 2/32
33/33 [==============================] - 82s 2s/step - loss: 0.6064 - dice_coefficient: 0.3949 - recall: 0.8592 - precision: 0.4220 - val_loss: 0.6802 - val_dice_coefficient: 0.3243 - val_recall: 0.9768 - val_precision: 0.2317
Epoch 3/32
33/33 [==============================] - 82s 2s/step - loss: 0.5489 - dice_coefficient: 0.4516 - recall: 0.8810 - precision: 0.4861 - val_loss: 0.6484 - val_dice_coefficient: 0.3561 - val_recall: 0.9634 - val_precision: 0.2991
Epoch 4/32
33/33 [==============================] - 82s 2s/step - loss: 0.5172 - dice_coefficient: 0.4822 - recall: 0.8925 - precision: 0.5205 - val_loss: 0.6383 - val_dice_coefficient: 0.3663 - val_recall: 0.9635 - val_precision: 0.2955
Epoch 5/32
33/33 [==============================] - 82s 2s/step - loss: 0.4943 - dice_coefficient: 0.5061 - recall: 0.8884 - precision: 0.5566 - val_loss: 0.6248 - val_dice_coefficient: 0.3805 - val_recall: 0.9566 - val_precision: 0.3064
Epoch 6/32
33/33 [==============================] - 82s 2s/step - loss: 0.4723 - dice_coefficient: 0.5277 - recall: 0.8875 - precision: 0.5884 - val_loss: 0.5597 - val_dice_coefficient: 0.4435 - val_recall: 0.8930 - val_precision: 0.4447
Epoch 7/32
33/33 [==============================] - 81s 2s/step - loss: 0.4561 - dice_coefficient: 0.5433 - recall: 0.8883 - precision: 0.6106 - val_loss: 0.5337 - val_dice_coefficient: 0.4697 - val_recall: 0.8714 - val_precision: 0.4767
Epoch 8/32
33/33 [==============================] - 83s 3s/step - loss: 0.4389 - dice_coefficient: 0.5591 - recall: 0.8913 - precision: 0.6331 - val_loss: 0.5312 - val_dice_coefficient: 0.4727 - val_recall: 0.8929 - val_precision: 0.4540
Epoch 9/32
33/33 [==============================] - 82s 2s/step - loss: 0.4291 - dice_coefficient: 0.5714 - recall: 0.8900 - precision: 0.6427 - val_loss: 0.5242 - val_dice_coefficient: 0.4791 - val_recall: 0.8721 - val_precision: 0.4604
Epoch 10/32
33/33 [==============================] - 81s 2s/step - loss: 0.4084 - dice_coefficient: 0.5902 - recall: 0.9060 - precision: 0.6581 - val_loss: 0.5010 - val_dice_coefficient: 0.5006 - val_recall: 0.8122 - val_precision: 0.5320
Epoch 11/32
33/33 [==============================] - 82s 2s/step - loss: 0.4006 - dice_coefficient: 0.6000 - recall: 0.8893 - precision: 0.6835 - val_loss: 0.5633 - val_dice_coefficient: 0.4440 - val_recall: 0.9153 - val_precision: 0.3501
Epoch 12/32
33/33 [==============================] - 82s 2s/step - loss: 0.4041 - dice_coefficient: 0.5965 - recall: 0.8953 - precision: 0.6639 - val_loss: 0.4895 - val_dice_coefficient: 0.5123 - val_recall: 0.8225 - val_precision: 0.5137
Epoch 13/32
33/33 [==============================] - 82s 2s/step - loss: 0.3832 - dice_coefficient: 0.6177 - recall: 0.9010 - precision: 0.6988 - val_loss: 0.4801 - val_dice_coefficient: 0.5210 - val_recall: 0.7971 - val_precision: 0.5403
Epoch 14/32
33/33 [==============================] - 81s 2s/step - loss: 0.3717 - dice_coefficient: 0.6289 - recall: 0.9037 - precision: 0.7088 - val_loss: 0.4840 - val_dice_coefficient: 0.5174 - val_recall: 0.7989 - val_precision: 0.5245
Epoch 15/32
33/33 [==============================] - 82s 2s/step - loss: 0.3633 - dice_coefficient: 0.6367 - recall: 0.9006 - precision: 0.7252 - val_loss: 0.4666 - val_dice_coefficient: 0.5322 - val_recall: 0.7229 - val_precision: 0.6027
Epoch 16/32
33/33 [==============================] - 82s 2s/step - loss: 0.3553 - dice_coefficient: 0.6452 - recall: 0.9011 - precision: 0.7281 - val_loss: 0.4610 - val_dice_coefficient: 0.5381 - val_recall: 0.7339 - val_precision: 0.5985
Epoch 17/32
33/33 [==============================] - 81s 2s/step - loss: 0.3453 - dice_coefficient: 0.6549 - recall: 0.9023 - precision: 0.7378 - val_loss: 0.4654 - val_dice_coefficient: 0.5330 - val_recall: 0.6966 - val_precision: 0.6089
Epoch 18/32
33/33 [==============================] - 82s 2s/step - loss: 0.3301 - dice_coefficient: 0.6688 - recall: 0.9072 - precision: 0.7581 - val_loss: 0.4815 - val_dice_coefficient: 0.5220 - val_recall: 0.8408 - val_precision: 0.4653
Epoch 19/32
33/33 [==============================] - 82s 2s/step - loss: 0.3211 - dice_coefficient: 0.6788 - recall: 0.9115 - precision: 0.7678 - val_loss: 0.4527 - val_dice_coefficient: 0.5461 - val_recall: 0.7302 - val_precision: 0.5826
Epoch 20/32
33/33 [==============================] - 82s 2s/step - loss: 0.3092 - dice_coefficient: 0.6915 - recall: 0.9101 - precision: 0.7816 - val_loss: 0.4495 - val_dice_coefficient: 0.5499 - val_recall: 0.7506 - val_precision: 0.5663
Epoch 21/32
33/33 [==============================] - 82s 2s/step - loss: 0.3062 - dice_coefficient: 0.6927 - recall: 0.9116 - precision: 0.7859 - val_loss: 0.4425 - val_dice_coefficient: 0.5554 - val_recall: 0.7317 - val_precision: 0.5949
Epoch 22/32
33/33 [==============================] - 82s 2s/step - loss: 0.2972 - dice_coefficient: 0.7029 - recall: 0.9144 - precision: 0.7903 - val_loss: 0.4698 - val_dice_coefficient: 0.5328 - val_recall: 0.8099 - val_precision: 0.4789
Epoch 23/32
33/33 [==============================] - 82s 2s/step - loss: 0.2920 - dice_coefficient: 0.7082 - recall: 0.9134 - precision: 0.7892 - val_loss: 0.4437 - val_dice_coefficient: 0.5532 - val_recall: 0.6719 - val_precision: 0.6369
Epoch 24/32
33/33 [==============================] - 82s 2s/step - loss: 0.2973 - dice_coefficient: 0.7028 - recall: 0.9012 - precision: 0.7886 - val_loss: 0.4423 - val_dice_coefficient: 0.5560 - val_recall: 0.7703 - val_precision: 0.5423
Epoch 25/32
33/33 [==============================] - 82s 2s/step - loss: 0.2753 - dice_coefficient: 0.7247 - recall: 0.9091 - precision: 0.8171 - val_loss: 0.4401 - val_dice_coefficient: 0.5581 - val_recall: 0.7539 - val_precision: 0.5528
Epoch 26/32
33/33 [==============================] - 82s 2s/step - loss: 0.2639 - dice_coefficient: 0.7356 - recall: 0.9218 - precision: 0.8220 - val_loss: 0.4471 - val_dice_coefficient: 0.5480 - val_recall: 0.5999 - val_precision: 0.6939
Epoch 27/32
33/33 [==============================] - 82s 2s/step - loss: 0.2711 - dice_coefficient: 0.7291 - recall: 0.9125 - precision: 0.8148 - val_loss: 0.4354 - val_dice_coefficient: 0.5595 - val_recall: 0.6216 - val_precision: 0.6748
Epoch 28/32
33/33 [==============================] - 82s 2s/step - loss: 0.2610 - dice_coefficient: 0.7390 - recall: 0.9084 - precision: 0.8323 - val_loss: 0.4258 - val_dice_coefficient: 0.5707 - val_recall: 0.6867 - val_precision: 0.6208
Epoch 29/32
33/33 [==============================] - 82s 2s/step - loss: 0.2551 - dice_coefficient: 0.7421 - recall: 0.9165 - precision: 0.8306 - val_loss: 0.4241 - val_dice_coefficient: 0.5732 - val_recall: 0.7184 - val_precision: 0.5933
Epoch 30/32
33/33 [==============================] - 82s 2s/step - loss: 0.2408 - dice_coefficient: 0.7591 - recall: 0.9220 - precision: 0.8379 - val_loss: 0.4452 - val_dice_coefficient: 0.5501 - val_recall: 0.5764 - val_precision: 0.7090
Epoch 31/32
33/33 [==============================] - 82s 2s/step - loss: 0.2398 - dice_coefficient: 0.7594 - recall: 0.9144 - precision: 0.8459 - val_loss: 0.4235 - val_dice_coefficient: 0.5734 - val_recall: 0.6562 - val_precision: 0.6460
Epoch 32/32
33/33 [==============================] - 82s 2s/step - loss: 0.2239 - dice_coefficient: 0.7764 - recall: 0.9219 - precision: 0.8607 - val_loss: 0.4341 - val_dice_coefficient: 0.5617 - val_recall: 0.6038 - val_precision: 0.6817
Out[18]:
<keras.callbacks.History at 0x7f793f63b150>
In [19]:
#evaluate the model
test_steps = (len(X_test)//BATCH)
if len(X_test) % BATCH != 0:
    test_steps += 1

model.evaluate(X_test, y_test, steps=test_steps)
2/2 [==============================] - 1s 379ms/step - loss: 0.4679 - dice_coefficient: 0.5345 - recall: 0.5707 - precision: 0.6793
Out[19]:
[0.46791747212409973,
 0.5344786643981934,
 0.5707184076309204,
 0.6792795658111572]

The model has

  • precision is 59.2%,
  • recall is 77.7%
  • loss is 52%
  • dice coefficient is 47.3%.

Let's do predictions on the model we trained

In [20]:
filename = 'Part 1Test Data - Prediction Image.jpeg'
unscaled = cv2.imread(filename)
image = cv2.resize(unscaled, (224, 224))
feat_scaled = preprocess_input(np.array(image, dtype=np.float32))
print(feat_scaled)
[[[ 0.5686275   0.4666667   0.45098042]
  [ 0.56078434  0.45882356  0.4431373 ]
  [ 0.5764706   0.47450984  0.45882356]
  ...
  [-0.17647058 -0.5058824  -0.73333335]
  [-0.27058822 -0.49019605 -0.75686276]
  [-0.27843136 -0.47450978 -0.7411765 ]]

 [[ 0.6         0.49803925  0.48235297]
  [ 0.6         0.4901961   0.48235297]
  [ 0.6313726   0.5294118   0.5137255 ]
  ...
  [-0.20784312 -0.52156866 -0.7490196 ]
  [-0.16862744 -0.40392154 -0.6627451 ]
  [-0.19215685 -0.41176468 -0.6784314 ]]

 [[ 0.54509807  0.45098042  0.41960788]
  [ 0.47450984  0.3803922   0.34901965]
  [ 0.5372549   0.4431373   0.4039216 ]
  ...
  [-0.14509803 -0.42745095 -0.6627451 ]
  [-0.3098039  -0.5686275  -0.827451  ]
  [-0.34117645 -0.5921569  -0.8509804 ]]

 ...

 [[-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  ...
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  ...
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  ...
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]]
In [21]:
y_pred = model.predict(np.array([feat_scaled]))
y_pred
Out[21]:
array([[[[0.09446189],
         [0.04429492],
         [0.04057583],
         ...,
         [0.04639384],
         [0.0477547 ],
         [0.11959705]],

        [[0.03711373],
         [0.04169792],
         [0.03271258],
         ...,
         [0.03292519],
         [0.02366853],
         [0.04882821]],

        [[0.03670496],
         [0.0333401 ],
         [0.02920771],
         ...,
         [0.0318214 ],
         [0.02024457],
         [0.04246575]],

        ...,

        [[0.02657372],
         [0.02486521],
         [0.04255354],
         ...,
         [0.03902066],
         [0.02617252],
         [0.04464817]],

        [[0.0315139 ],
         [0.02876309],
         [0.03948918],
         ...,
         [0.03646418],
         [0.03054896],
         [0.07278934]],

        [[0.12925553],
         [0.0566991 ],
         [0.04988256],
         ...,
         [0.0585421 ],
         [0.06128988],
         [0.15886113]]]], dtype=float32)
In [22]:
pred_mask = cv2.resize((1.0*(y_pred[0] > 0.5)), (224,224))

Viewing the predicted image

In [23]:
plt.imshow(feat_scaled)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Out[23]:
<matplotlib.image.AxesImage at 0x7f79319bdad0>
In [24]:
plt.imshow(pred_mask)
Out[24]:
<matplotlib.image.AxesImage at 0x7f793f4e2b90>

We are able to identify the face in the given pictures correctly.

image.png

• DOMAIN: Face recognition

• CONTEXT: Company X intends to build a face identification model to recognise human faces.

• DATA DESCRIPTION: The dataset comprises of images and its mask where there is a human face.

• PROJECT OBJECTIVE: Face Aligned Face Dataset from Pinterest. This dataset contains 10,770 images for 100 people. All images are taken from 'Pinterest' and aligned using dlib library. Some data samples:

image.png

• TASK: In this problem, we use a pre-trained model trained on Face recognition to recognise similar faces. Here, we are particularly interested in recognising whether two given faces are of the same person or not. Below are the steps involved in the project.

• Load the dataset and create the metadata.
• Check some samples of metadata.
• Load the pre-trained model and weights.
• Generate Embedding vectors for each face in the dataset.
• Build distance metrics for identifying the distance between two given images.
• Use PCA for dimensionality reduction.
• Build an SVM classifier in order to map each image to its right person.
• Import the the test image. Display the image. Use the SVM trained model to predict the face.
In [25]:
# opening the zip file in READ mode
with ZipFile("Part 3 - Aligned Face Dataset from Pinterest.zip", 'r') as zip:
    zip.extractall()
    print('Done!')
Done!
In [26]:
directory = '/content/drive/MyDrive/Colab Notebooks/Adv_CV2/PINS/'
In [27]:
class IdentityMetadata():
    def __init__(self, base, name, file):
        # print(base, name, file)
        self.base = base #directory
        self.name = name #name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        return os.path.join(self.base, self.name, self.file) 
    
def load_metadata(path):
    metadata = []
    for i in os.listdir(path):
        for f in os.listdir(os.path.join(path, i)):
            # Check file extension. Allow only jpg/jpeg' files.
            ext = os.path.splitext(f)[1]
            if ext == '.jpg' or ext == '.jpeg':
                metadata.append(IdentityMetadata(path, i, f))
    return np.array(metadata)
metadata = load_metadata('PINS')
In [28]:
def load_image(path):
    img = cv2.imread(path, 1)
    # OpenCV loads images with color channels
    # in BGR order. So we need to reverse them
    return img[...,::-1]
In [29]:
print(metadata.shape)
metadata
(10770,)
Out[29]:
array([PINS/pins_Aaron Paul/Aaron Paul0_262.jpg,
       PINS/pins_Aaron Paul/Aaron Paul101_248.jpg,
       PINS/pins_Aaron Paul/Aaron Paul101_247.jpg, ...,
       PINS/pins_zendaya/zendaya97.jpg, PINS/pins_zendaya/zendaya98.jpg,
       PINS/pins_zendaya/zendaya99.jpg], dtype=object)

Load few sample image

In [30]:
image = load_image("PINS/pins_zendaya/zendaya99.jpg")
print(image.shape)
plt.imshow(image)
(299, 299, 3)
Out[30]:
<matplotlib.image.AxesImage at 0x7f79376c0350>
In [31]:
image2 = load_image("PINS/pins_Aaron Paul/Aaron Paul101_248.jpg")
print(image2.shape)
plt.imshow(image2)
(299, 299, 3)
Out[31]:
<matplotlib.image.AxesImage at 0x7f79376a0d50>

VGG Face model

In [32]:
def vgg_face():	
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    model.add(Convolution2D(4096, (7, 7), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(4096, (1, 1), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Convolution2D(2622, (1, 1)))
    model.add(Flatten())
    model.add(Activation('softmax'))
    return model
In [33]:
model = vgg_face()
model.load_weights("/content/drive/MyDrive/Colab Notebooks/Adv_CV2/Part 3 - vgg_face_weights.h5")

Get vgg_face_descriptor

In [34]:
vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)

Generate embeddings for each image in the dataset

In [35]:
# Get embedding vector for first image in the metadata using the pre-trained model
img_path = metadata[0].image_path()
img = load_image(img_path)

# Normalising pixel values from [0-255] to [0-1]: scale RGB values to interval [0,1]
img = (img / 255.).astype(np.float32)

img = cv2.resize(img, dsize = (224,224))
print(img.shape)

# Obtain embedding vector for an image
# Get the embedding vector for the above image using vgg_face_descriptor model and print the shape 
embedding_vector = vgg_face_descriptor.predict(np.expand_dims(img, axis=0))[0]
print(embedding_vector.shape)
(224, 224, 3)
(2622,)

Generate embeddings for all images

In [36]:
embeddings = np.zeros((metadata.shape[0], 2622))

for i, m in enumerate(metadata):
  image_path = m.image_path()
  image = load_image(image_path)

  image = (image/255.).astype(np.float32)
  image = cv2.resize(image, dsize=(224,224))

  embedding_vector = vgg_face_descriptor.predict(np.expand_dims(image, axis=0))[0]
  embeddings[i] = embedding_vector
In [37]:
embeddings
Out[37]:
array([[ 0.03170303, -0.0150513 , -0.01243402, ...,  0.0004314 ,
         0.0021908 , -0.00908097],
       [ 0.02801891, -0.00112629, -0.01265751, ..., -0.00904748,
        -0.005784  ,  0.02125827],
       [ 0.03497704, -0.00105059, -0.01248934, ..., -0.01053091,
         0.00179321,  0.02439156],
       ...,
       [ 0.00965872,  0.00372159, -0.01345865, ..., -0.01433404,
         0.01899944,  0.03899154],
       [ 0.0023093 ,  0.00094763, -0.00544814, ..., -0.01600495,
         0.01053975,  0.03683248],
       [ 0.017723  ,  0.01064051,  0.01995193, ..., -0.02618429,
         0.03288926,  0.04882648]])
In [38]:
#Function to calculate distance between given 2 pairs of images.
def distance(emb1, emb2):
    return np.sum(np.square(emb1 - emb2))

Plot images and get distance between the pairs given below

In [39]:
import matplotlib.pyplot as plt

def show_pair(idx1, idx2):
    plt.figure(figsize=(8,3))
    plt.suptitle(f'Distance = {distance(embeddings[idx1], embeddings[idx2]):.2f}')
    plt.subplot(121)
    plt.imshow(load_image(metadata[idx1].image_path()))
    plt.subplot(122)
    plt.imshow(load_image(metadata[idx2].image_path()));    

show_pair(2, 3)
show_pair(2, 180)

show_pair(30,31)
show_pair(30,100)

show_pair(70,72)
show_pair(70,115)

Create train and test sets

In [40]:
train_idx = np.arange(metadata.shape[0]) % 9 != 0     #every 9th example goes in test data and rest go in train data
test_idx = np.arange(metadata.shape[0]) % 9 == 0

# one half as train examples of 10 identities
X_train = embeddings[train_idx]

# another half as test examples of 10 identities
X_test = embeddings[test_idx]

targets = np.array([m.name for m in metadata])

#train labels
y_train = targets[train_idx]

#test labels
y_test = targets[test_idx]
In [41]:
print(X_train[0])
print(y_train[0])
[ 0.02801891 -0.00112629 -0.01265751 ... -0.00904748 -0.005784
  0.02125827]
pins_Aaron Paul
In [42]:
np.unique(y_train)
Out[42]:
array(['pins_Aaron Paul', 'pins_Alvaro Morte', 'pins_Amanda Crew',
       'pins_Amaury Nolasco', 'pins_Anna Gunn',
       'pins_Benedict Cumberbatch', 'pins_Betsy Brandt',
       'pins_Brenton Thwaites', 'pins_Brit Marling',
       'pins_Bryan Cranston', 'pins_Caity Lotz', 'pins_Cameron Monaghan',
       'pins_Chance Perdomo', 'pins_Chris Evans', 'pins_Chris Pratt',
       'pins_Cobie Smulders', 'pins_Danielle Panabaker',
       'pins_Dave Franco', 'pins_Dominic Purcell', 'pins_Dwayne Johnson',
       'pins_Emilia Clarke', 'pins_Emily Bett Rickards',
       'pins_Emma Stone', 'pins_Gwyneth Paltrow', 'pins_Henry Cavil',
       'pins_Jason Momoa', 'pins_Jeremy Renner', 'pins_Jesse Eisenberg',
       'pins_Jim Parsons', 'pins_Jon Bernthal', 'pins_Josh Radnor',
       'pins_Kit Harington', 'pins_Krysten Ritter',
       'pins_Kumail Nanjiani', 'pins_Maisie Williams',
       'pins_Mark Ruffalo', 'pins_Martin Starr', 'pins_Melissa benoit',
       'pins_Mike Colter', 'pins_Morena Baccarin', 'pins_Morgan Freeman',
       'pins_Natalie Portman ', 'pins_Neil Patrick Harris',
       'pins_Paul Rudd', 'pins_Pedro Alonso', 'pins_Peter Dinklage',
       'pins_RJ Mitte', 'pins_Rami Melek', 'pins_Robert Knepper',
       'pins_Robin Taylor', 'pins_Ryan Reynolds',
       'pins_Sarah Wayne Callies', 'pins_Scarlett Johansson',
       'pins_Sebastian Stan', 'pins_Sophie Turner', 'pins_Stephen Amell',
       'pins_Sundar Pichai', 'pins_Thomas Middleditch',
       'pins_Tom Cavanagh', 'pins_Ursula Corbero',
       'pins_Wentworth Miller', 'pins_Willa Holland',
       'pins_William Fichtner', 'pins_alexandra daddario',
       'pins_alycia debnam carey face', 'pins_amber heard face',
       'pins_anne hathaway', 'pins_barbara palvin face',
       'pins_bellamy blake face', 'pins_bill gates', 'pins_brie larson',
       'pins_chadwick boseman face', 'pins_david mazouz', 'pins_drake',
       'pins_dua lipa face', 'pins_eliza taylor',
       'pins_elizabeth olsen face', 'pins_elon musk',
       'pins_emma watson face', 'pins_gal gadot face',
       'pins_grant gustin face', 'pins_jason isaacs', 'pins_jeff bezos',
       'pins_kiernan shipka ', 'pins_kristen stewart face',
       'pins_lindsey morgan face', 'pins_margot robbie face',
       'pins_maria pedraza', 'pins_mark zuckerberg', 'pins_miguel herran',
       'pins_millie bobby brown', 'pins_rihanna',
       'pins_robert downey jr face', 'pins_sean pertwee',
       'pins_selena gomez', 'pins_shakira', 'pins_tati gabrielle',
       'pins_taylor swift', 'pins_tom holland face', 'pins_zendaya'],
      dtype='<U29')
In [43]:
np.unique(y_test)
Out[43]:
array(['pins_Aaron Paul', 'pins_Alvaro Morte', 'pins_Amanda Crew',
       'pins_Amaury Nolasco', 'pins_Anna Gunn',
       'pins_Benedict Cumberbatch', 'pins_Betsy Brandt',
       'pins_Brenton Thwaites', 'pins_Brit Marling',
       'pins_Bryan Cranston', 'pins_Caity Lotz', 'pins_Cameron Monaghan',
       'pins_Chance Perdomo', 'pins_Chris Evans', 'pins_Chris Pratt',
       'pins_Cobie Smulders', 'pins_Danielle Panabaker',
       'pins_Dave Franco', 'pins_Dominic Purcell', 'pins_Dwayne Johnson',
       'pins_Emilia Clarke', 'pins_Emily Bett Rickards',
       'pins_Emma Stone', 'pins_Gwyneth Paltrow', 'pins_Henry Cavil',
       'pins_Jason Momoa', 'pins_Jeremy Renner', 'pins_Jesse Eisenberg',
       'pins_Jim Parsons', 'pins_Jon Bernthal', 'pins_Josh Radnor',
       'pins_Kit Harington', 'pins_Krysten Ritter',
       'pins_Kumail Nanjiani', 'pins_Maisie Williams',
       'pins_Mark Ruffalo', 'pins_Martin Starr', 'pins_Melissa benoit',
       'pins_Mike Colter', 'pins_Morena Baccarin', 'pins_Morgan Freeman',
       'pins_Natalie Portman ', 'pins_Neil Patrick Harris',
       'pins_Paul Rudd', 'pins_Pedro Alonso', 'pins_Peter Dinklage',
       'pins_RJ Mitte', 'pins_Rami Melek', 'pins_Robert Knepper',
       'pins_Robin Taylor', 'pins_Ryan Reynolds',
       'pins_Sarah Wayne Callies', 'pins_Scarlett Johansson',
       'pins_Sebastian Stan', 'pins_Sophie Turner', 'pins_Stephen Amell',
       'pins_Sundar Pichai', 'pins_Thomas Middleditch',
       'pins_Tom Cavanagh', 'pins_Ursula Corbero',
       'pins_Wentworth Miller', 'pins_Willa Holland',
       'pins_William Fichtner', 'pins_alexandra daddario',
       'pins_alycia debnam carey face', 'pins_amber heard face',
       'pins_anne hathaway', 'pins_barbara palvin face',
       'pins_bellamy blake face', 'pins_bill gates', 'pins_brie larson',
       'pins_chadwick boseman face', 'pins_david mazouz', 'pins_drake',
       'pins_dua lipa face', 'pins_eliza taylor',
       'pins_elizabeth olsen face', 'pins_elon musk',
       'pins_emma watson face', 'pins_gal gadot face',
       'pins_grant gustin face', 'pins_jason isaacs', 'pins_jeff bezos',
       'pins_kiernan shipka ', 'pins_kristen stewart face',
       'pins_lindsey morgan face', 'pins_margot robbie face',
       'pins_maria pedraza', 'pins_mark zuckerberg', 'pins_miguel herran',
       'pins_millie bobby brown', 'pins_rihanna',
       'pins_robert downey jr face', 'pins_sean pertwee',
       'pins_selena gomez', 'pins_shakira', 'pins_tati gabrielle',
       'pins_taylor swift', 'pins_tom holland face', 'pins_zendaya'],
      dtype='<U29')

Encode the Labels

In [44]:
le = LabelEncoder()
le.fit(y_train)

y_train = le.transform(y_train)
y_test = le.transform(y_test)

Standardize the feature values

In [45]:
# Standarize features
scaler = StandardScaler()

scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Reduce dimensions using PCA

In [46]:
from sklearn.decomposition import PCA

pca = PCA(n_components=128)
pca.fit(X_train)

X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

Build a Classifier

In [47]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svc = SVC()

svc.fit(X_train,y_train)

y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))
0.9640768588137009

Test results

In [48]:
import warnings
# Suppress LabelEncoder warning
warnings.filterwarnings('ignore')

example_idx =200

example_image = load_image(metadata[test_idx][example_idx].image_path())

image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))

vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)

example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]

plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
In [49]:
## predict first test image 

example_image = load_image("Part 2 - Test Image - Dwayne Johnson4.jpg")

image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))

vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)

example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]

plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');
In [50]:
## predict second test image 

example_image = load_image("Part 2- Test Image - Benedict Cumberbatch9.jpg")

image = (example_image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(224,224))

vector = vgg_face_descriptor.predict(np.expand_dims(image,axis=0))[0]
vector = np.reshape(vector,(1,-1))
scaled_vector = scaler.transform(vector)
pca_transformed = pca.transform(scaled_vector)

example_prediction = svc.predict(pca_transformed)
example_identity = le.inverse_transform(example_prediction)[0]

plt.imshow(example_image)
plt.title(f'Identified as {example_identity}');

Able to predict the face correctly for both the test images.